library(dplyr)
library(lubridate)
library(ggplot2)
library(plotly)
library(scales)
library(glue)
#membaca data
sales <- read.csv("Adidas US Sales Datasets.csv")
head(sales)
#>      Retailer Retailer.ID Invoice.Date    Region    State     City
#> 1 Foot Locker     1185732     01-01-20 Northeast New York New York
#> 2 Foot Locker     1185732     02-01-20 Northeast New York New York
#> 3 Foot Locker     1185732     03-01-20 Northeast New York New York
#> 4 Foot Locker     1185732     04-01-20 Northeast New York New York
#> 5 Foot Locker     1185732     05-01-20 Northeast New York New York
#> 6 Foot Locker     1185732     06-01-20 Northeast New York New York
#>                     Product Price.per.Unit Units.Sold Total.Sales
#> 1     Men's Street Footwear             50       1200      600000
#> 2   Men's Athletic Footwear             50       1000      500000
#> 3   Women's Street Footwear             40       1000      400000
#> 4 Women's Athletic Footwear             45        850      382500
#> 5             Men's Apparel             60        900      540000
#> 6           Women's Apparel             50       1000      500000
#>   Operating.Profit Operating.Margin Sales.Method
#> 1           300000               50     In-store
#> 2           150000               30     In-store
#> 3           140000               35     In-store
#> 4           133875               35     In-store
#> 5           162000               30     In-store
#> 6           125000               25     In-store
#mengecek struktur data
sales %>% 
  glimpse()
#> Rows: 9,648
#> Columns: 13
#> $ Retailer         <chr> "Foot Locker", "Foot Locker", "Foot Locker", "Foot Lo…
#> $ Retailer.ID      <int> 1185732, 1185732, 1185732, 1185732, 1185732, 1185732,…
#> $ Invoice.Date     <chr> "01-01-20", "02-01-20", "03-01-20", "04-01-20", "05-0…
#> $ Region           <chr> "Northeast", "Northeast", "Northeast", "Northeast", "…
#> $ State            <chr> "New York", "New York", "New York", "New York", "New …
#> $ City             <chr> "New York", "New York", "New York", "New York", "New …
#> $ Product          <chr> "Men's Street Footwear", "Men's Athletic Footwear", "…
#> $ Price.per.Unit   <int> 50, 50, 40, 45, 60, 50, 50, 50, 40, 45, 60, 50, 50, 5…
#> $ Units.Sold       <int> 1200, 1000, 1000, 850, 900, 1000, 1250, 900, 950, 825…
#> $ Total.Sales      <int> 600000, 500000, 400000, 382500, 540000, 500000, 62500…
#> $ Operating.Profit <int> 300000, 150000, 140000, 133875, 162000, 125000, 31250…
#> $ Operating.Margin <int> 50, 30, 35, 35, 30, 25, 50, 30, 35, 35, 30, 25, 50, 3…
#> $ Sales.Method     <chr> "In-store", "In-store", "In-store", "In-store", "In-s…

data yang perlu diubah strukturnya: as.factor: Retailer, Retailer.ID, Region, State, City, Product, Sales.Method as.datetime: Invoice.Date

#merubah struktur data
sales_clean <- 
sales %>% 
  # fungsi untuk mengubah beberapa kolom secara bersamaan ke tipe data yang sama
  mutate_at(.vars = c("Retailer", "Retailer.ID", "Region", "State", "City", "Product", "Sales.Method"), # parameter untuk memilih kolom yang akan diubah tipe datanya
            .funs = as.factor) %>% # parameter untuk fungsi tipe data yang dituju
  mutate(Invoice.Date = dmy(Invoice.Date))
glimpse(sales_clean)
#> Rows: 9,648
#> Columns: 13
#> $ Retailer         <fct> Foot Locker, Foot Locker, Foot Locker, Foot Locker, F…
#> $ Retailer.ID      <fct> 1185732, 1185732, 1185732, 1185732, 1185732, 1185732,…
#> $ Invoice.Date     <date> 2020-01-01, 2020-01-02, 2020-01-03, 2020-01-04, 2020…
#> $ Region           <fct> Northeast, Northeast, Northeast, Northeast, Northeast…
#> $ State            <fct> New York, New York, New York, New York, New York, New…
#> $ City             <fct> New York, New York, New York, New York, New York, New…
#> $ Product          <fct> Men's Street Footwear, Men's Athletic Footwear, Women…
#> $ Price.per.Unit   <int> 50, 50, 40, 45, 60, 50, 50, 50, 40, 45, 60, 50, 50, 5…
#> $ Units.Sold       <int> 1200, 1000, 1000, 850, 900, 1000, 1250, 900, 950, 825…
#> $ Total.Sales      <int> 600000, 500000, 400000, 382500, 540000, 500000, 62500…
#> $ Operating.Profit <int> 300000, 150000, 140000, 133875, 162000, 125000, 31250…
#> $ Operating.Margin <int> 50, 30, 35, 35, 30, 25, 50, 30, 35, 35, 30, 25, 50, 3…
#> $ Sales.Method     <fct> In-store, In-store, In-store, In-store, In-store, In-…

#Check missing value

anyNA(sales)
#> [1] FALSE

#Plot1 plot title: Total item sold based on date in any region plot type: line plot

#jumlah sales pada setiap retailer di masing2 region dengan cara base
sales_regbase <- aggregate(Total.Sales ~ Retailer + Region, 
                      data = sales_clean, 
                      FUN = sum)
sales_regbase[order(sales_regbase$Total.Sales, decreasing = T),]
#>         Retailer    Region Total.Sales
#> 28     West Gear      West   137551280
#> 7    Foot Locker Northeast    65326474
#> 15 Sports Direct     South    65258121
#> 25        Kohl's      West    62302115
#> 19   Foot Locker Southeast    59669118
#> 20 Sports Direct Southeast    54178543
#> 2    Foot Locker   Midwest    47987394
#> 24   Foot Locker      West    37804709
#> 6         Amazon Northeast    36262590
#> 17     West Gear     South    33087031
#> 16       Walmart     South    33049858
#> 11     West Gear Northeast    32293733
#> 4  Sports Direct   Midwest    26207191
#> 9  Sports Direct Northeast    24698097
#> 5      West Gear   Midwest    22540586
#> 3         Kohl's   Midwest    22229415
#> 21       Walmart Southeast    21005539
#> 22     West Gear Southeast    17491703
#> 1         Amazon   Midwest    16835873
#> 8         Kohl's Northeast    14031168
#> 10       Walmart Northeast    13712005
#> 23        Amazon      West    13365025
#> 26 Sports Direct      West    12129045
#> 18        Amazon Southeast    10826333
#> 13   Foot Locker     South     9307025
#> 27       Walmart      West     6791008
#> 14        Kohl's     South     3552055
#> 12        Amazon     South      409091
#data untuk plot1
sales_region <- 
sales_clean %>% 
  filter(Product %in% "Men's Street Footwear") %>% 
  group_by(Retailer, Region) %>% 
  summarise(Total.Sales = sum(Total.Sales)) %>% 
  arrange(-Total.Sales)
sales_region
#> # A tibble: 28 × 3
#> # Groups:   Retailer [6]
#>    Retailer      Region    Total.Sales
#>    <fct>         <fct>           <int>
#>  1 West Gear     West         27016671
#>  2 Foot Locker   Northeast    18267693
#>  3 Foot Locker   Midwest      15327737
#>  4 Foot Locker   Southeast    13573667
#>  5 Kohl's        West         12792583
#>  6 Sports Direct Southeast    11465945
#>  7 Sports Direct South        10959347
#>  8 Amazon        Northeast    10562789
#>  9 West Gear     Northeast     8587466
#> 10 Foot Locker   West          8081554
#> # ℹ 18 more rows
#jumlah transaksi pada setiap retailer di masing2 region
trans_region <- aggregate(Total.Sales ~ Retailer + Region, 
                      data = sales_clean, 
                      FUN = length)
trans_region[order(trans_region$Total.Sales, decreasing = T),]
#>         Retailer    Region Total.Sales
#> 28     West Gear      West        1207
#> 2    Foot Locker   Midwest         840
#> 7    Foot Locker Northeast         835
#> 15 Sports Direct     South         732
#> 25        Kohl's      West         550
#> 6         Amazon Northeast         541
#> 9  Sports Direct Northeast         462
#> 20 Sports Direct Southeast         427
#> 19   Foot Locker Southeast         422
#> 17     West Gear     South         408
#> 24   Foot Locker      West         396
#> 5      West Gear   Midwest         366
#> 16       Walmart     South         366
#> 11     West Gear Northeast         302
#> 4  Sports Direct   Midwest         286
#> 3         Kohl's   Midwest         244
#> 8         Kohl's Northeast         170
#> 21       Walmart Southeast         150
#> 13   Foot Locker     South         144
#> 1         Amazon   Midwest         136
#> 18        Amazon Southeast         134
#> 23        Amazon      West         126
#> 26 Sports Direct      West         125
#> 22     West Gear Southeast          91
#> 10       Walmart Northeast          66
#> 14        Kohl's     South          66
#> 27       Walmart      West          44
#> 12        Amazon     South          12
# Pembuatan Visual Statis
plot1 <- 
ggplot(data = sales_region, 
       mapping = aes(x=Total.Sales, y=reorder(Retailer, Total.Sales))) +
  geom_col(mapping = aes(fill=Region), position = "stack") +
  labs(title = "Total Sales Based on Region",
       x = "Total Sales",
       y = "Retailer",
       fill="Region") +
  theme_classic()
# Mengubah Visual Statis menjadi Interaktif
ggplotly(plot1, tooltip = 'text')
#data untuk plot2
sales_unitsold <- 
sales_clean %>% 
  filter(Product %in% "Men's Street Footwear") %>% 
  group_by(Retailer, Region) %>% 
  summarise(Units.Sold = sum(Units.Sold)) %>% 
  arrange(-Units.Sold)
sales_unitsold
#> # A tibble: 28 × 3
#> # Groups:   Retailer [6]
#>    Retailer      Region    Units.Sold
#>    <fct>         <fct>          <int>
#>  1 West Gear     West           73294
#>  2 Foot Locker   Northeast      48459
#>  3 Sports Direct South          45756
#>  4 Foot Locker   Midwest        43503
#>  5 Kohl's        West           38004
#>  6 Foot Locker   Southeast      34812
#>  7 Sports Direct Southeast      31110
#>  8 Amazon        Northeast      26969
#>  9 Walmart       South          23759
#> 10 West Gear     South          23341
#> # ℹ 18 more rows
# Pembuatan Visual Statis
plot2 <- 
ggplot(data = sales_unitsold, 
       mapping = aes(x=Units.Sold, y=reorder(Retailer, Units.Sold))) +
  geom_col(mapping = aes(fill=Region), position = "stack") +
  labs(title = "Total Unit Sold Based on Region",
       x = "Total Unit Sold",
       y = "Retailer",
       fill="Region") +
  theme_classic()
# Mengubah Visual Statis menjadi Interaktif
ggplotly(plot1, tooltip = 'text')